# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/27 16:06
# @File: test.py
from pyspark import SparkConf, SparkContext
from jieba import cut_for_search
# 运行在本地
if __name__ == '__main__':
    # 创建配置文件
    conf = SparkConf().setAppName("wc").setMaster("local[*]")
    sc = SparkContext(conf=conf)
    # 读取文件数据
    rdd = sc.textFile("file:///tmp/pycharm_project_161/data/SogouQ.txt").map(lambda x: x.split("\t"))

    print(rdd.collect())

    # 缓一下
    # rdd.cache()
    def jieba_result(x):
        result = cut_for_search(x[2])
        l = [i for i in result]
        return l

    rdd1 = rdd.flatMap(jieba_result)

    print(rdd1.collect())

    # data_result1 = rdd1.collect()
    # if i in data_result1:
    #     i.



    # print(rdd.collect())
